In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import random
import datetime
import xgboost as xgb
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as px
from plotly.subplots import make_subplots
from plotly.offline import iplot
from datetime import date
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import MinMaxScaler
In [2]:
circuits = pd.read_csv('circuits.csv')
constructor_results = pd.read_csv('constructor_results.csv')
constructors_standings = pd.read_csv('constructor_standings.csv')
constructors = pd.read_csv('constructors.csv')
drivers_standings = pd.read_csv('driver_standings.csv')
drivers = pd.read_csv('drivers.csv')
lap_times = pd.read_csv('lap_times.csv')
pit_stops = pd.read_csv('pit_stops.csv')
qualifying = pd.read_csv('qualifying.csv')
races = pd.read_csv('races.csv')
results = pd.read_csv('results.csv')
seasons = pd.read_csv('seasons.csv')
status = pd.read_csv('status.csv')
In [3]:
def getMiliSeconds(time):
    try:
        if '.' in time:
            x = datetime.datetime.strptime(time, '%M:%S.%f')
        elif ',' in time:
            x = datetime.datetime.strptime(time, '%M:%S,%f')
        else:
            x = datetime.datetime.strptime(time, '%M:%S:%f')
        return datetime.timedelta(minutes = x.minute, seconds = x.second, microseconds = x.microsecond).total_seconds()
    except:
        x = datetime.datetime.strptime(str(time).split('.')[0],'%M:%S:%f')
        return datetime.timedelta(minutes = x.minute, seconds = x.second, microseconds = x.microsecond).total_seconds()

def timefn(x):
    if (x.find(':') != -1):
        return getMiliSeconds(x)
    else:
        return x

results['fastestLapTime'] = results['fastestLapTime'].replace('0','00:00.0')
qualifying['q1'] = qualifying['q1'].replace('0','00:00.0')
qualifying['q2'] = qualifying['q2'].replace('0','00:00.0')
qualifying['q3'] = qualifying['q3'].replace('0','00:00.0')

qualifying['q1'] = qualifying['q1'].apply(lambda x: getMiliSeconds(x))
qualifying['q2'] = qualifying['q2'].apply(lambda x: getMiliSeconds(x))
qualifying['q3'] = qualifying['q3'].apply(lambda x: getMiliSeconds(x))
lap_times['time'] = lap_times['time'].apply(lambda x: getMiliSeconds(x))
results['fastestLapTime'] = results['fastestLapTime'].apply(lambda x: getMiliSeconds(x))

pit_stops['duration'] = pit_stops['duration'].apply(lambda x: timefn(x))

drivers['fullName'] = drivers['forename'] + " " + drivers['surname']
In [4]:
data = pd.merge(results, qualifying, on = ['raceId', 'driverId','constructorId'], how = 'left')
data = pd.merge(data, races, on = 'raceId', how = 'left')
data = pd.merge(data, drivers, on = 'driverId', how = 'left')
data = pd.merge(data, constructors, on = 'constructorId', how = 'left')
data = pd.merge(data, circuits, on = 'circuitId', how = 'left')
data = data[['year', 'round', 'circuitId', 'circuitRef', 'name', 'name_x', 'lat', 'lng', 'resultId', 'raceId', 'driverId', 'driverRef',
             'fullName', 'dob', 'nationality_x', 'constructorId', 'constructorRef', 'name_y', 'nationality_y',
             'q1', 'q2', 'q3','grid', 'position_x', 'positionOrder', 'laps', 'milliseconds', 'fastestLap', 'rank', 'fastestLapTime',
             'fastestLapSpeed', 'statusId']]
data = data.rename(columns = {'round':'roundNum', 'name': 'circuitName', 'name_x': 'GrandPrix', 'nationality_x': 'driverNationality',
                              'name_y': 'constructor', 'nationality_y': 'constructorNationality', 'position_x': 'finishingPos', 
                              'rank':'fastestLapRank', 'milliseconds':'raceTime'}, inplace = False)
data = pd.merge(data, constructors_standings, on = ['constructorId', 'raceId'], how = 'left')
data = data.rename(columns = {'points': 'constructorPoints', 'position': 'constructorPosition', 'wins': 'constructorWins'}, inplace = False)
data = data.drop(['constructorStandingsId', 'positionText'], axis = 1, inplace = False)
data = pd.merge(data, drivers_standings, on = ['driverId', 'raceId'], how = 'left')
data = data.rename(columns = {'points': 'driverPoints', 'position': 'driverPosition', 'wins': 'driverWins'}, inplace = False)
data = data.drop(['driverStandingsId', 'positionText'], axis = 1, inplace = False)
pit_stop_count = pd.DataFrame(pit_stops.groupby(['raceId', 'driverId']).count())
data = pd.merge(data, pit_stop_count, on = ['raceId','driverId'], how = 'left')
data = data.drop(['lap', 'time', 'duration', 'milliseconds'], axis = 1, inplace = False)
pit_stops['duration'] = pit_stops['duration'].astype(float)
pit_stop_duration_mean = pit_stops.groupby(['raceId', 'driverId']).mean()
data = pd.merge(data, pit_stop_duration_mean, on = ['raceId','driverId'], how = 'left')
data = data.rename(columns = {'stop_x': 'pitStops', 'duration': 'pitStopDuration'}, inplace = False)
data = data.drop(['stop_y', 'lap', 'milliseconds'], axis = 1, inplace = False)
data['birth_year'] = pd.DatetimeIndex(data['dob']).year
data['dob'] =  data.year - data.birth_year
data = data.rename(columns = {'dob':'age'}, inplace = False)
data = data.drop(['birth_year'], axis = 1, inplace = False)
data = data[['year', 'roundNum', 'circuitId', 'circuitRef', 'circuitName', 'lat', 'lng', 'GrandPrix', 'resultId', 'raceId',
             'driverId', 'driverRef', 'fullName', 'age', 'driverNationality', 'constructorId', 'constructorRef',
             'constructor', 'constructorNationality', 'q1', 'q2', 'q3', 'grid', 'finishingPos', 'positionOrder',
             'laps', 'raceTime', 'fastestLap', 'fastestLapRank', 'fastestLapTime', 'fastestLapSpeed', 'statusId', 'pitStops', 'pitStopDuration',
             'constructorPoints', 'constructorPosition', 'constructorWins','driverPoints', 'driverPosition', 'driverWins']]

def agecorrection(x):
        if x < 10:
            return 59
        else:
            return x

data['age'] = data['age'].apply(lambda x: agecorrection(x))

data = data.sort_values(['year','roundNum'])
pd.set_option('display.max_columns', 500)
data.head()
Out[4]:
year roundNum circuitId circuitRef circuitName lat lng GrandPrix resultId raceId driverId driverRef fullName age driverNationality constructorId constructorRef constructor constructorNationality q1 q2 q3 grid finishingPos positionOrder laps raceTime fastestLap fastestLapRank fastestLapTime fastestLapSpeed statusId pitStops pitStopDuration constructorPoints constructorPosition constructorWins driverPoints driverPosition driverWins
20024 1950 1 9 silverstone Silverstone Circuit 52.0786 -1.01694 British Grand Prix 20025 833 642 farina Nino Farina 59 Italian 51 alfa Alfa Romeo Italian NaN NaN NaN 1 1 1 70 8003600 0 0 0.0 0.0 1 NaN NaN NaN NaN NaN 9.0 1.0 1.0
20025 1950 1 9 silverstone Silverstone Circuit 52.0786 -1.01694 British Grand Prix 20026 833 786 fagioli Luigi Fagioli 52 Italian 51 alfa Alfa Romeo Italian NaN NaN NaN 2 2 2 70 8006200 0 0 0.0 0.0 1 NaN NaN NaN NaN NaN 6.0 2.0 0.0
20026 1950 1 9 silverstone Silverstone Circuit 52.0786 -1.01694 British Grand Prix 20027 833 686 reg_parnell Reg Parnell 59 British 51 alfa Alfa Romeo Italian NaN NaN NaN 4 3 3 70 8055600 0 0 0.0 0.0 1 NaN NaN NaN NaN NaN 4.0 3.0 0.0
20027 1950 1 9 silverstone Silverstone Circuit 52.0786 -1.01694 British Grand Prix 20028 833 704 cabantous Yves Cabantous 59 French 154 lago Talbot-Lago French NaN NaN NaN 6 4 4 68 0 0 0 0.0 0.0 12 NaN NaN NaN NaN NaN 3.0 4.0 0.0
20028 1950 1 9 silverstone Silverstone Circuit 52.0786 -1.01694 British Grand Prix 20029 833 627 rosier Louis Rosier 59 French 154 lago Talbot-Lago French NaN NaN NaN 9 5 5 68 0 0 0 0.0 0.0 12 NaN NaN NaN NaN NaN 2.0 5.0 0.0
In [5]:
data.tail()
Out[5]:
year roundNum circuitId circuitRef circuitName lat lng GrandPrix resultId raceId driverId driverRef fullName age driverNationality constructorId constructorRef constructor constructorNationality q1 q2 q3 grid finishingPos positionOrder laps raceTime fastestLap fastestLapRank fastestLapTime fastestLapSpeed statusId pitStops pitStopDuration constructorPoints constructorPosition constructorWins driverPoints driverPosition driverWins
24955 2020 17 24 yas_marina Yas Marina Circuit 24.4672 54.6031 Abu Dhabi Grand Prix 24961 1047 841 giovinazzi Antonio Giovinazzi 27 Italian 51 alfa Alfa Romeo Italian 97.1 98.2 0.0 14 16 16 54 0 29 7 101.7 196.650 11 1.0 21.480000 8.0 8.0 0.0 4.0 17.0 0.0
24956 2020 17 24 yas_marina Yas Marina Circuit 24.4672 54.6031 Abu Dhabi Grand Prix 24962 1047 849 latifi Nicholas Latifi 25 Canadian 3 williams Williams British 98.4 0.0 0.0 18 17 17 54 0 49 16 102.5 195.073 11 2.0 21.870500 0.0 10.0 0.0 0.0 21.0 0.0
24957 2020 17 24 yas_marina Yas Marina Circuit 24.4672 54.6031 Abu Dhabi Grand Prix 24963 1047 825 kevin_magnussen Kevin Magnussen 28 Danish 210 haas Haas F1 Team American 97.9 0.0 0.0 20 18 18 54 0 50 13 102.0 196.025 11 2.0 22.906000 3.0 9.0 0.0 1.0 20.0 0.0
24958 2020 17 24 yas_marina Yas Marina Circuit 24.4672 54.6031 Abu Dhabi Grand Prix 24964 1047 850 pietro_fittipaldi Pietro Fittipaldi 24 Brazilian 210 haas Haas F1 Team American 98.2 0.0 0.0 17 19 19 53 0 50 8 101.7 196.588 12 3.0 26.015667 3.0 9.0 0.0 0.0 23.0 0.0
24959 2020 17 24 yas_marina Yas Marina Circuit 24.4672 54.6031 Abu Dhabi Grand Prix 24965 1047 815 perez Sergio Pérez 30 Mexican 211 racing_point Racing Point British 96.0 0.0 0.0 19 0 20 8 0 6 19 103.3 193.625 7 NaN NaN 195.0 4.0 1.0 125.0 4.0 1.0
In [6]:
data.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 24960 entries, 20024 to 24959
Data columns (total 40 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   year                    24960 non-null  int64  
 1   roundNum                24960 non-null  int64  
 2   circuitId               24960 non-null  int64  
 3   circuitRef              24960 non-null  object 
 4   circuitName             24960 non-null  object 
 5   lat                     24960 non-null  float64
 6   lng                     24960 non-null  float64
 7   GrandPrix               24960 non-null  object 
 8   resultId                24960 non-null  int64  
 9   raceId                  24960 non-null  int64  
 10  driverId                24960 non-null  int64  
 11  driverRef               24960 non-null  object 
 12  fullName                24960 non-null  object 
 13  age                     24960 non-null  int64  
 14  driverNationality       24960 non-null  object 
 15  constructorId           24960 non-null  int64  
 16  constructorRef          24960 non-null  object 
 17  constructor             24960 non-null  object 
 18  constructorNationality  24960 non-null  object 
 19  q1                      8684 non-null   float64
 20  q2                      8684 non-null   float64
 21  q3                      8684 non-null   float64
 22  grid                    24960 non-null  int64  
 23  finishingPos            24960 non-null  int64  
 24  positionOrder           24960 non-null  int64  
 25  laps                    24960 non-null  int64  
 26  raceTime                24960 non-null  int64  
 27  fastestLap              24960 non-null  int64  
 28  fastestLapRank          24960 non-null  int64  
 29  fastestLapTime          24960 non-null  float64
 30  fastestLapSpeed         24960 non-null  float64
 31  statusId                24960 non-null  int64  
 32  pitStops                3894 non-null   float64
 33  pitStopDuration         3894 non-null   float64
 34  constructorPoints       23085 non-null  float64
 35  constructorPosition     23085 non-null  float64
 36  constructorWins         23085 non-null  float64
 37  driverPoints            24479 non-null  float64
 38  driverPosition          24479 non-null  float64
 39  driverWins              24479 non-null  float64
dtypes: float64(15), int64(16), object(9)
memory usage: 7.8+ MB
In [7]:
data.head()
Out[7]:
year roundNum circuitId circuitRef circuitName lat lng GrandPrix resultId raceId driverId driverRef fullName age driverNationality constructorId constructorRef constructor constructorNationality q1 q2 q3 grid finishingPos positionOrder laps raceTime fastestLap fastestLapRank fastestLapTime fastestLapSpeed statusId pitStops pitStopDuration constructorPoints constructorPosition constructorWins driverPoints driverPosition driverWins
20024 1950 1 9 silverstone Silverstone Circuit 52.0786 -1.01694 British Grand Prix 20025 833 642 farina Nino Farina 59 Italian 51 alfa Alfa Romeo Italian NaN NaN NaN 1 1 1 70 8003600 0 0 0.0 0.0 1 NaN NaN NaN NaN NaN 9.0 1.0 1.0
20025 1950 1 9 silverstone Silverstone Circuit 52.0786 -1.01694 British Grand Prix 20026 833 786 fagioli Luigi Fagioli 52 Italian 51 alfa Alfa Romeo Italian NaN NaN NaN 2 2 2 70 8006200 0 0 0.0 0.0 1 NaN NaN NaN NaN NaN 6.0 2.0 0.0
20026 1950 1 9 silverstone Silverstone Circuit 52.0786 -1.01694 British Grand Prix 20027 833 686 reg_parnell Reg Parnell 59 British 51 alfa Alfa Romeo Italian NaN NaN NaN 4 3 3 70 8055600 0 0 0.0 0.0 1 NaN NaN NaN NaN NaN 4.0 3.0 0.0
20027 1950 1 9 silverstone Silverstone Circuit 52.0786 -1.01694 British Grand Prix 20028 833 704 cabantous Yves Cabantous 59 French 154 lago Talbot-Lago French NaN NaN NaN 6 4 4 68 0 0 0 0.0 0.0 12 NaN NaN NaN NaN NaN 3.0 4.0 0.0
20028 1950 1 9 silverstone Silverstone Circuit 52.0786 -1.01694 British Grand Prix 20029 833 627 rosier Louis Rosier 59 French 154 lago Talbot-Lago French NaN NaN NaN 9 5 5 68 0 0 0 0.0 0.0 12 NaN NaN NaN NaN NaN 2.0 5.0 0.0
In [8]:
data = data.replace(np.nan, 0)
In [9]:
data_10 = data.copy()
data_10 = data_10[data_10['year'] > 2009]
data_10.head()
Out[9]:
year roundNum circuitId circuitRef circuitName lat lng GrandPrix resultId raceId driverId driverRef fullName age driverNationality constructorId constructorRef constructor constructorNationality q1 q2 q3 grid finishingPos positionOrder laps raceTime fastestLap fastestLapRank fastestLapTime fastestLapSpeed statusId pitStops pitStopDuration constructorPoints constructorPosition constructorWins driverPoints driverPosition driverWins
20320 2010 1 3 bahrain Bahrain International Circuit 26.0325 50.5106 Bahrain Grand Prix 20323 337 4 alonso Fernando Alonso 29 Spanish 6 ferrari Ferrari Italian 114.6 114.2 114.6 3 1 1 49 5960396 45 1 118.3 191.706 1 0.0 0.0 43.0 1.0 1.0 25.0 1.0 1.0
20321 2010 1 3 bahrain Bahrain International Circuit 26.0325 50.5106 Bahrain Grand Prix 20324 337 13 massa Felipe Massa 29 Brazilian 6 ferrari Ferrari Italian 115.3 114.3 114.2 2 2 2 49 5976495 38 5 119.7 189.392 1 0.0 0.0 43.0 1.0 1.0 18.0 2.0 0.0
20322 2010 1 3 bahrain Bahrain International Circuit 26.0325 50.5106 Bahrain Grand Prix 20325 337 1 hamilton Lewis Hamilton 25 British 1 mclaren McLaren British 115.3 114.7 115.2 4 3 3 49 5983578 42 4 119.6 189.665 1 0.0 0.0 21.0 2.0 0.0 15.0 3.0 0.0
20323 2010 1 3 bahrain Bahrain International Circuit 26.0325 50.5106 Bahrain Grand Prix 20326 337 20 vettel Sebastian Vettel 23 German 9 red_bull Red Bull Austrian 115.0 113.9 114.1 1 4 4 49 5999195 32 12 120.2 188.627 1 0.0 0.0 16.0 4.0 0.0 12.0 4.0 0.0
20324 2010 1 3 bahrain Bahrain International Circuit 26.0325 50.5106 Bahrain Grand Prix 20327 337 3 rosberg Nico Rosberg 25 German 131 mercedes Mercedes German 115.5 114.7 115.2 5 5 5 49 6000609 45 13 120.2 188.599 1 0.0 0.0 18.0 3.0 0.0 10.0 5.0 0.0

Exploratory Data Analysis

Data analysis of the various race cuircuits, constructors and drivers

In [10]:
tracks = [{'lat': circuits['lat'], 'lon': circuits['lng'],
         'marker': {'color': 'orangered','line': {'color': 'black', 'width': 0.5}, 'size': 10,
                    'sizemode': 'diameter'}, 'text': '0.387', 'type': 'scattergeo'},]

layout = go.Layout(title = 'Circuits World Location', showlegend = False,
                   geo = dict(scope='world', projection = dict( type = 'natural earth'), showland = True,
                              landcolor = 'navajowhite', subunitwidth = 1, countrywidth = 1, 
                              subunitcolor = "navajowhite", countrycolor = "navajowhite"),
                   paper_bgcolor = 'white', plot_bgcolor = 'lightgrey')

fig =  go.Figure(layout = layout, data = tracks)
fig.update_layout(margin = dict(l = 60, r = 60, t = 50, b = 50))
iplot(fig, validate = False)
In [11]:
races_count = races['name'].value_counts()
trace = go.Pie(labels = races_count.index, values = races_count.values, hole = 0.6, textinfo = "none")
layout = go.Layout(title = 'Most number of Grand Prix', paper_bgcolor = 'gainsboro')
fig = go.Figure(data = [trace], layout = layout)
fig.update_layout(margin = dict(l = 60, r = 60, t = 50, b = 50))
iplot(fig, filename = "plotting-library")
In [12]:
dangerousCircuits = data.loc[(data['statusId'] == 3) | (data['statusId'] == 4)]['circuitName'].value_counts()[:20]

plt.style.use('dark_background')
fig, axs = plt.subplots(figsize=(20, 8))
axs.tick_params(axis="x", labelsize = 15)
axs.tick_params(axis="y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.barplot(dangerousCircuits.index, dangerousCircuits.values)
plt.title("20 circuit with the most number of accidents", fontsize = 20)
plt.xlabel('Circuit', fontsize = 15)
plt.ylabel('Count', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()
In [13]:
plt.style.use('dark_background')
fig, axs = plt.subplots(figsize=(20, 8))
axs.tick_params(axis="x", labelsize = 15)
axs.tick_params(axis="y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.boxplot(x = 'circuitName', y = 'fastestLapSpeed', data = data_10)
plt.ylim(100, 300)
plt.title("Circuit vs Fastest Lap Speeds - Last 10 Years", fontsize = 20)
plt.xlabel('Circuit', fontsize = 15)
plt.ylabel('Km/H', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()
In [14]:
plt.style.use('dark_background')
fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.boxplot(x = 'year', y = 'fastestLapSpeed', data = data_10)
plt.ylim(100, 300)
plt.title("Fastest Lap Speeds over Last 10 Years", fontsize = 20)
plt.xlabel('Year', fontsize = 15)
plt.ylabel('Km/H', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()

We can see that the lap speeds have increased obviously over the last 10 years. However, we see that post 2014, the average speeds have risen faster per year post the 1.6 V6 Turbo Engine rule change. Could be due to the MGU-K and MGU-H introduction

In [15]:
fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.boxplot(x = 'circuitName', y = 'fastestLapTime', data = data_10)
plt.ylim(50, 150)
plt.title("Circuit vs Fastest Lap Times - Last 10 Years", fontsize = 20)
plt.xlabel('Circuit', fontsize = 15)
plt.ylabel('Seconds', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()
In [16]:
plt.style.use('dark_background')
fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.boxplot(x = 'year', y = 'fastestLapTime', data = data_10)
plt.ylim(50, 175)
plt.title("Fastest Lap Times over Last 10 Years", fontsize = 20)
plt.xlabel('Year', fontsize = 15)
plt.ylabel('Seconds', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()
In [17]:
plt.style.use('dark_background')
fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.boxplot(x = 'year', y = 'pitStopDuration', data = data_10)
plt.ylim(10, 60)
plt.title("Average pit stop times over Last 10 Years", fontsize = 20)
plt.xlabel('Year',fontsize = 15)
plt.ylabel('Seconds', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()

Oddly, average pit stop durations have increased over the years

In [18]:
plt.style.use('dark_background')
fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.boxplot(x = 'circuitName', y = 'pitStopDuration', data = data_10)
plt.ylim(10, 60)
plt.title("Average pit stop times over Last 10 Years - by circuit", fontsize = 20)
plt.xlabel('Circuit',fontsize = 15)
plt.ylabel('Seconds', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()
In [19]:
plt.style.use('dark_background')
fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.boxenplot(x = 'circuitName', y = 'pitStops', data = data_10)
plt.title("Average number of pit stops by circuit", fontsize = 20)
plt.xlabel('Circuit',fontsize = 15)
plt.ylabel('Number of stops', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()
In [20]:
constructors_count = data.drop_duplicates(subset = ["constructorId"])
constructors_count = constructors_count['constructorNationality'].value_counts()
trace = go.Pie(labels = constructors_count.index, values = constructors_count.values, hole = 0.6,textinfo = "none")
layout = go.Layout(title = 'Percentage of Constructors by Nationality', paper_bgcolor = 'gainsboro')
fig = go.Figure(data = [trace], layout = layout)
fig.update_layout(margin = dict(l = 60, r = 60, t = 50, b = 50))
iplot(fig, filename = "plotting-library")
In [21]:
constructors_count = data.drop_duplicates(subset = ["constructorId"])
fig = px.sunburst(constructors_count, path = ['constructorNationality', 'constructor'])
fig.update_layout(margin = dict(l = 60, r = 60, t = 50, b = 50))
fig.update_layout(go.Layout(title = 'Teams by Nationality', paper_bgcolor = 'gainsboro'))
In [22]:
drivers_count = data.drop_duplicates(subset = ["driverId"])
drivers_count = drivers_count['driverNationality'].value_counts()
trace = go.Pie(labels = drivers_count.index, values = drivers_count.values, hole = 0.6,textinfo = "none")
layout = go.Layout(title = 'Percentage of Drivers by Nationality', paper_bgcolor = 'gainsboro')
fig = go.Figure(data = [trace], layout = layout)
fig.update_layout(margin = dict(l = 60, r = 60, t = 50, b = 50))
iplot(fig, filename = "plotting-library")
In [23]:
driver_age = data.drop_duplicates(subset = ["driverId"])
driver_age = driver_age[driver_age['year'] >= 1968]
driver_age_dist_count = driver_age['age'].value_counts()

trace = go.Pie(labels = driver_age_dist_count.index, values = driver_age_dist_count.values, hole = 0.6,textinfo = "none")
layout = go.Layout(title = 'Age wise race debutants', paper_bgcolor = 'gainsboro')
fig = go.Figure(data = [trace], layout = layout)
fig.update_layout(margin = dict(l = 60, r = 60, t = 50, b = 50))
iplot(fig, filename = "plotting-library")

young_debutants = driver_age[driver_age['age'] <= 21]
young_debutants = young_debutants[['fullName', 'age', 'year', 'GrandPrix', 'constructor']]
fig = px.sunburst(young_debutants, path = ['age', 'fullName'])
fig.update_layout(margin = dict(l = 60, r = 60, t = 50, b = 50))
fig.update_layout(go.Layout(title = 'Young Debutants', paper_bgcolor = 'gainsboro'))

Hamilton's Performance Analysis

In [24]:
polesitters = data.loc[data['grid'] == 1].groupby('fullName')['grid'].count().sort_values(ascending = False)[:20]

fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 20)
axs.tick_params(axis = "y", labelsize = 15)

sns.barplot(list(polesitters.index), list(polesitters.values))
axs.set_facecolor('k')
axs.patch.set_alpha(0.9)
plt.title("Most number of pole positions (Top 20)", fontsize = 20)
plt.ylabel('Count', fontsize = 20)
plt.xticks(rotation = 45, ha = 'right')
plt.xlabel('Driver', fontsize = 20)
plt.show()
In [25]:
ham_wins = data.loc[(data['driverId'] == 1) & (data['positionOrder'] == 1)].groupby('GrandPrix')['driverId'].count().sort_values(ascending = False)

fig, axs = plt.subplots(figsize = (10, 15))
axs.tick_params(axis = "x", labelsize = 20)
axs.tick_params(axis = "y", labelsize = 15)

sns.barplot(list(ham_wins.values), list(ham_wins.index))
axs.set_facecolor('k')
axs.patch.set_alpha(0.9)
plt.title("Victory by Grand Prix", fontsize = 20)
plt.ylabel('Gran Prix', fontsize = 20)
plt.xlabel('Total', fontsize = 20)
plt.show()

Interestingly HAM has won more Hungarian GPs than on homeground at Silverstone (British GP)

Let's see how he performs compared to some other legends

In [26]:
topBritish = data.loc[(data['driverNationality'] == 'British') & 
                                       (data['positionOrder'] == 1)].groupby('fullName')['raceId'].count().sort_values(ascending=False)[:10]

topWorld = data.loc[(data['positionOrder'] == 1)].groupby('fullName')['raceId'].count().sort_values(ascending=False)[:20]

fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 20)
axs.tick_params(axis = "y", labelsize = 20)

sns.barplot(list(topBritish.index), list(topBritish.values))
axs.set_facecolor('k')
axs.patch.set_alpha(0.9)
plt.title("Top British Race Winners", fontsize=20)
plt.ylabel('Drivers', fontsize = 15)
plt.xlabel('Total Wins', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()

fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 20)
axs.tick_params(axis = "y", labelsize = 20)

sns.barplot(list(topWorld.index), list(topWorld.values))
axs.set_facecolor('k')
axs.patch.set_alpha(0.9)
plt.xticks(rotation = 45, ha = 'right')
plt.title("Top Race Winners - World", fontsize = 20)
plt.ylabel('Drivers', fontsize = 15)
plt.xlabel('Total Wins', fontsize = 15)
plt.show()

Hamilton has surpassed Schumacher in the most number of wins.

In [27]:
f_laps = data.copy()
f_laps = f_laps[['raceId', 'year', 'fullName', 'fastestLapTime']].sort_values(['year', 'raceId'])
f_laps = f_laps[f_laps['fastestLapTime'] != 0]
f_laps = f_laps.drop(['year'], axis = 1)

y = range(1, data.raceId.max()+1)
counts = pd.DataFrame()
for x in y:
    counts = counts.append(f_laps.loc[f_laps['raceId'] == x].sort_values(['fastestLapTime'])[:1])

counts = counts.groupby('fullName').count().sort_values(['fastestLapTime'], ascending = False)[:15]
counts
fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 20)
axs.tick_params(axis = "y", labelsize = 20)

sns.barplot(list(counts.index), counts['fastestLapTime'])
axs.set_facecolor('k')
axs.patch.set_alpha(0.9)
plt.xticks(rotation = 45, ha = 'right')
plt.title("Most number of fastest laps", fontsize = 30)
plt.ylabel('Total Number of Fastest Laps', fontsize = 15)
plt.xlabel('Drivers', fontsize = 15)
plt.show()
In [28]:
f_laps_speed = data.copy()
f_laps_speed = f_laps_speed[['raceId', 'year', 'fullName', 'fastestLapSpeed']].sort_values(['year', 'raceId'])
f_laps_speed = f_laps_speed[f_laps_speed['fastestLapSpeed'] != 0]
f_laps_speed = f_laps_speed.drop(['year'], axis = 1)

y = range(1, data.raceId.max()+1)
counts_speed = pd.DataFrame()
for x in y:
    counts_speed = counts_speed.append(f_laps_speed.loc[f_laps_speed['raceId'] == x].sort_values(['fastestLapSpeed'], ascending = False)[:1])

counts_speed = counts_speed.groupby('fullName').count().sort_values(['fastestLapSpeed'], ascending = False)[:15]
counts_speed
fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 20)
axs.tick_params(axis = "y", labelsize = 20)

sns.barplot(list(counts_speed.index), counts_speed['fastestLapSpeed'])
axs.set_facecolor('k')
axs.patch.set_alpha(0.9)
plt.xticks(rotation = 45, ha = 'right')
plt.title("Fastest Drivers on track - Top Speed", fontsize = 30)
plt.ylabel('x Times Fastest Driver on track', fontsize = 15)
plt.xlabel('Drivers', fontsize = 15)
plt.show()
In [29]:
def wins_per_year(driverRef, year):
    try:
        return topTenYears.loc[(topTenYears['driverRef'] == driverRef) & (topTenYears['positionOrder'] == 1) & 
                               (topTenYears['year'] == year)].groupby('driverId')['raceId'].count().values[0]
    except:
        return 0

topTenYears = data.copy()
hamWinsPerYear = []
vetWinsPerYear = []
rosWinsPerYear = []
butWinsPerYear = []
aloWinsPerYear = []
botWinsPerYear = []

x = (2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020)

for year in x:
    hamWinsPerYear.append(wins_per_year('hamilton',year))
    vetWinsPerYear.append(wins_per_year('vettel',year))
    rosWinsPerYear.append(wins_per_year('rosberg',year))
    butWinsPerYear.append(wins_per_year('button',year))
    aloWinsPerYear.append(wins_per_year('alonso',year))
    botWinsPerYear.append(wins_per_year('bottas',year))

fig, axs = plt.subplots(figsize = (20, 10))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)

line1, = axs.plot(x, hamWinsPerYear, label = 'Hamilton', color='c', linewidth = 6,)
line2, = axs.plot(x, vetWinsPerYear, label = 'Vettel', color = 'r', linewidth = 2)
line3, = axs.plot(x, rosWinsPerYear, label = 'Rosberg', color = 'darkcyan', linewidth = 2)
line4, = axs.plot(x, butWinsPerYear, label = 'Button', color = 'gold', linewidth = 2)
line5, = axs.plot(x, aloWinsPerYear, label = 'Alonso', color = 'deeppink', linewidth = 2)
line5, = axs.plot(x, botWinsPerYear, label = 'Bottas', color = 'darkcyan', linewidth = 2)
plt.title("Top 5 from the Last 10 years - Wins per Year ", fontsize = 20)
plt.ylabel('Total', fontsize = 15)
plt.xlabel('Years', fontsize = 15)
axs.legend()
plt.show()
In [30]:
def championsInYears(years, driverRef):
    total = []
    t = 0
    for year in years:
        winner = topTenYears.loc[topTenYears['year'] == year].groupby('driverRef')['driverPoints'].sum().sort_values(ascending = False).index[0]
        if winner == driverRef:
            t = t + 1
        total.append(t)
    return total

schumiCP = championsInYears(np.sort(topTenYears['year'].unique()), 'michael_schumacher')
fangioCP = championsInYears(np.sort(topTenYears['year'].unique()), 'fangio')
prostCP = championsInYears(np.sort(topTenYears['year'].unique()), 'prost')
hamiltonCP = championsInYears(np.sort(topTenYears['year'].unique()), 'hamilton')
vettelCP = championsInYears(np.sort(topTenYears['year'].unique()), 'vettel')

x = range(1950, 2021)

fig, axs = plt.subplots(figsize = (20, 10))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)

line1, = axs.plot(x, schumiCP, label = 'Schumacher World Championships Count', color = 'r', linewidth = 2)
line2, = axs.plot(x, fangioCP, label = 'Fangio World Championships Count', color = 'magenta', linewidth = 2)
line3, = axs.plot(x, prostCP, label = 'Prost World Championships Count', color ='darkcyan', linewidth = 2)
line4, = axs.plot(x, hamiltonCP, label = 'Hammilton World Championships Count', color = 'aqua', linewidth = 4)
line5, = axs.plot(x, vettelCP, label = 'Vettel World Championships Count', color = 'darkblue', linewidth= 2)
plt.title("Top 5 from All Time - Championships over the Years ", fontsize = 20)
plt.ylabel('Total', fontsize = 15)
plt.xlabel('Years', fontsize = 15)
axs.legend()
plt.show()

Hamilton Rosberg Rivalry

In [31]:
mercedesId = constructors.loc[(constructors['constructorRef'] == 'mercedes')]['constructorId'].values[0]
hamQtdQualy = len(qualifying.loc[(qualifying['driverId'] == 1) & (qualifying['position'] == 1) & (qualifying['constructorId'] == mercedesId)])
rosQtdQualy = len(qualifying.loc[(qualifying['driverId'] == 3 ) & (qualifying['position'] == 1) & (qualifying['constructorId'] == mercedesId)])
hamQtdRaces = len(results.loc[(results['positionOrder'] == 1) & (results['driverId'] == 1) & (results['constructorId'] == 131)])
rosQtdRaces = len(results.loc[(results['positionOrder'] == 1) & (results['driverId'] == 3) & (results['constructorId'] == 131)])
hamWinsPerYear = data.loc[(data['driverId']==1) & (data['constructorId'] == 131) &
                          (data['positionOrder'] == 1) & (data['year'] < 2017)].groupby('year')['driverId'].count()
rosWinsPerYear = data.loc[(data['driverId']==3) & (data['constructorId'] == 131) &
                          (data['positionOrder'] == 1) & (data['year'] > 2012)].groupby('year')['driverId'].count()

fig, axs = plt.subplots(figsize = (16, 6))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)

sns.barplot(['Hamilton - Qualy', 'Rosberg - Qualy', 'Hamilton - Wins', 'Rosberg - Wins'], [hamQtdQualy, rosQtdQualy, hamQtdRaces, rosQtdRaces])
axs.set_facecolor('k')
plt.title("Total Poles and wins in Mercedes", fontsize = 20)
plt.ylabel('Total', fontsize = 15)
plt.xlabel('Drivers', fontsize = 15)
plt.show()

x = list(hamWinsPerYear.index)
y = hamWinsPerYear
f,ax = plt.subplots(figsize = (20, 10))
ax.tick_params(axis="x", labelsize = 15)
ax.tick_params(axis="y", labelsize = 15)

ax.set_facecolor('k')
line1, = ax.plot(x, hamWinsPerYear.values, label = 'Hamilton Wins', color = 'c', linewidth = 4)
line2, = ax.plot(x, rosWinsPerYear.values, label = 'Rosberg Wins' ,linewidth = 4, color = 'lightgray')
plt.title("Total Wins per Year in Mercedes", fontsize = 20)
plt.ylabel('Total', fontsize = 15)
plt.xlabel('Years', fontsize = 15)
axs.legend()
plt.show()
No handles with labels found to put in legend.

Iconic races for Hamilton

Suzuka GP 2015

In [32]:
timeHamilton = lap_times.loc[((lap_times['driverId'] == 1) & (lap_times['raceId'] == 940))]['time'].iloc[:].values
timeRos = lap_times.loc[(lap_times['driverId'] == 3) & (lap_times['raceId'] == 940)]['time'].iloc[:].values
timeVet = lap_times.loc[(lap_times['driverId'] == 20) & (lap_times['raceId'] == 940)]['time'].iloc[:].values

x = range(0, 53)
fig, axs = plt.subplots(figsize = (20, 10))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 20)

axs.set_facecolor('k')
axs.patch.set_alpha(0.9)
line1, = axs.plot(x, timeHamilton, label= 'Hamilton', color = 'darkcyan', linewidth = 4)
line2, = axs.plot(x, timeRos, label = 'Rosberg', color = 'darkgreen' , linewidth = 4)
line2, = axs.plot(x, timeVet, label = 'Vettel', color = 'darkred' , linewidth = 4)
plt.title("Suzuka GP - Time Laps Comparison - 2014 ", fontsize = 20)
plt.ylabel('Total Seconds(s)', fontsize = 15)
plt.xlabel('Lap', fontsize = 15)
axs.legend()
plt.show()

Monza GP 2011

Biggest battle between Hamilton and the 7 time world champ Schumacher

In [33]:
timeHamilton = lap_times.loc[((lap_times['driverId'] == 1) & (lap_times['raceId'] == 853))]['time'].iloc[:].values
timeSchumi = lap_times.loc[(lap_times['driverId'] == 30) & (lap_times['raceId'] == 853)]['time'].iloc[:].values
x = range(0, 53)
fig, axs = plt.subplots(figsize = (20, 10))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 20)

axs.set_facecolor('k')
axs.patch.set_alpha(0.9)
line1, = axs.plot(x, timeHamilton, label = 'Hamilton', color = 'cyan', linewidth = 4)
line2, = axs.plot(x, timeSchumi, label = 'Schumacher' , color = 'red', linewidth = 4)
plt.title("Monza GP - Lap Times Comparison - 2011 ", fontsize = 20)
plt.ylabel('Total Seconds(s)', fontsize = 15)
plt.xlabel('Lap', fontsize = 15)
ax.legend()
plt.show()

positionsHamilton = lap_times.loc[((lap_times['driverId'] == 1) & (lap_times['raceId'] == 853))]['position'].iloc[:].values
positionsSchumi = lap_times.loc[(lap_times['driverId'] == 30) & (lap_times['raceId'] == 853)]['position'].iloc[:].values

x = range(0, 53)
fig, axs = plt.subplots(figsize = (20, 10))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 20)

axs.set_facecolor('k')
axs.patch.set_alpha(0.9)
line1, = axs.plot(x, positionsHamilton, label = 'Hamilton', color = 'cyan', linewidth = 4,)
line2, = axs.plot(x, positionsSchumi, label = 'Schumacher', color = 'red' , linewidth = 4,)
plt.title("Monza Gran Prix 2011- Fight for position", fontsize = 20)
plt.ylabel('Position', fontsize = 15)
plt.xlabel('Lap', fontsize = 15)
axs.legend()
plt.ylim(10, 1)
plt.show()

British GP - 2020

Hamilton's last lap = Lightning McQueen

In [34]:
timeHamilton = lap_times.loc[((lap_times['driverId'] == 1) & (lap_times['raceId'] == 1034))]['time'].iloc[:].values
timeVerst = lap_times.loc[(lap_times['driverId'] == 830) & (lap_times['raceId'] == 1034)]['time'].iloc[:].values
x = range(0, 52)
fig, axs = plt.subplots(figsize = (20, 10))
ax.tick_params(axis = "x", labelsize = 15)
ax.tick_params(axis = "y", labelsize = 20)

axs.set_facecolor('whitesmoke')
axs.patch.set_alpha(0.9)
line1, = axs.plot(x, timeHamilton, label = 'Hamilton', color = 'k', linewidth = 7)
line2, = axs.plot(x, timeVerst, label = 'Verstappen', color = 'blue', linewidth = 4)
plt.title("Silverstone GP - Lap Times Comparison - 2020 ", fontsize = 20)
plt.ylabel('Total Seconds(s)', fontsize = 15)
plt.xlabel('Lap', fontsize = 15)
axs.legend()
plt.show()

Turkish GP - 2020

Hamiltons's 7th Title winning race

In [35]:
timeHamilton = lap_times.loc[((lap_times['driverId'] == 1) & (lap_times['raceId'] == 1044))]['time'].iloc[:].values
timePerez = lap_times.loc[(lap_times['driverId'] == 815) & (lap_times['raceId'] == 1044)]['time'].iloc[:].values
timeVet = lap_times.loc[(lap_times['driverId'] == 20) & (lap_times['raceId'] == 1044)]['time'].iloc[:].values

timeStroll = lap_times.loc[(lap_times['driverId'] == 840) & (lap_times['raceId'] == 1044)]['time'].iloc[:].values


x = range(0, 58)
fig, axs = plt.subplots(figsize = (20, 10))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 20)

axs.set_facecolor('whitesmoke')
axs.patch.set_alpha(0.9)
line1, = axs.plot(x, timeHamilton, label = 'Hamilton', color = 'k', linewidth = 7)
line2, = axs.plot(x, timePerez, label = 'Perez' , color = 'hotpink', linewidth = 4)
line3, = axs.plot(x, timeVet, label = 'Vettel' , color = 'Red', linewidth = 4)
line4, = axs.plot(x, timeStroll, label = 'Stroll' , color = 'm', linewidth = 3)

plt.title("Turkish GP - Lap Times Comparison - 2020 ", fontsize = 20)
plt.ylabel('Total Seconds(s)', fontsize = 20)
plt.xlabel('Lap', fontsize = 15)
axs.legend()
plt.show()
In [36]:
racesQualysHam = data_10.loc[(data_10['driverRef'] == 'hamilton') & (data_10['year'] == 2019) & (data_10['q3'] != 0)]

qualTimes = racesQualysHam['q3'].values
raceTimes = racesQualysHam['fastestLapTime'].values
x = racesQualysHam.GrandPrix.values

fig, axs = plt.subplots(figsize = (20, 10))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)

axs.set_facecolor('whitesmoke')
line1, = axs.plot(x, raceTimes, 'bs', c = 'red',label = "Race Time")
line2, = axs.plot(x, qualTimes,'bs', c = 'blue', label = "Qualy time")
plt.title("Race vs Qualification - 2019", fontsize = 20)
plt.ylabel('Time (s)', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
axs.legend()
plt.show()
In [37]:
racesQualysHam = data_10.loc[(data_10['driverRef'] == 'hamilton') & (data_10['year'] == 2020) & (data_10['q3'] != 0)]

qualTimes = racesQualysHam['q3'].values
raceTimes = racesQualysHam['fastestLapTime'].values
x = racesQualysHam.GrandPrix.values

fig, axs = plt.subplots(figsize = (20, 10))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)

axs.set_facecolor('whitesmoke')
line1, = axs.plot(x, raceTimes, 'bs', c = 'red',label = "Race Time")
line2, = axs.plot(x, qualTimes,'bs', c = 'blue', label = "Qualy time")
plt.title("Race vs Qualification - 2020", fontsize = 20)
plt.ylabel('Time (s)', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
axs.legend()
plt.show()
In [38]:
winsHam = data.loc[(data['driverRef']=='hamilton')]['grid'].value_counts()

trace = go.Pie(labels = winsHam.index, values = winsHam.values, hole = 0.6,textinfo = "none")
layout = go.Layout(title = 'Hamilton - Percentage of entered races vs race wins', paper_bgcolor = 'gainsboro')
fig = go.Figure(data = [trace], layout = layout)
fig.update_layout(margin = dict(l = 60, r = 60, t = 50, b = 50))
iplot(fig, filename = "plotting-library")

winsSenna = data.loc[(data['driverRef']=='senna')]['grid'].value_counts()

trace = go.Pie(labels = winsSenna.index, values = winsSenna.values, hole = 0.6,textinfo = "none")
layout = go.Layout(title = 'Senna - Percentage of entered races vs race wins', paper_bgcolor = 'gainsboro')
fig = go.Figure(data = [trace], layout = layout)
fig.update_layout(margin = dict(l = 60, r = 60, t = 50, b = 50))
iplot(fig, filename = "plotting-library")

winsSchumi = data.loc[(data['driverRef']=='michael_schumacher')]['grid'].value_counts()

trace = go.Pie(labels = winsSchumi.index, values = winsSchumi.values, hole = 0.6,textinfo = "none")
layout = go.Layout(title = 'Schumacher - Percentage of entered races vs race wins', paper_bgcolor = 'gainsboro')
fig = go.Figure(data = [trace], layout = layout)
fig.update_layout(margin = dict(l = 60, r = 60, t = 50, b = 50))
iplot(fig, filename = "plotting-library")
In [39]:
mclarensRacers = data.loc[(data['constructorId'] == 1) &
                                 (data['positionOrder'] == 1)].groupby('fullName')['raceId'].count().sort_values(ascending = False)[:10]

mercedesRacers = data.loc[(data['constructorId'] == 131) &
                                 (data['positionOrder'] == 1)].groupby('fullName')['raceId'].count().sort_values(ascending = False)[:10]

fig, axs = plt.subplots(figsize = (20, 10))
ax.tick_params(axis = "x", labelsize = 15)
ax.tick_params(axis = "y", labelsize = 20)

sns.barplot(list(mclarensRacers.values), list(mclarensRacers.index))
sns.color_palette("hls", 8)
axs.set_facecolor('k')
axs.patch.set_alpha(0.9)
plt.title("Top McLaren Race Winners", fontsize = 20)
plt.ylabel('Drivers', fontsize = 15)
plt.xlabel('Total Wins', fontsize = 15)
plt.show()

fig, axs = plt.subplots(figsize = (20, 10))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 20)

sns.barplot(list(mercedesRacers.values), list(mercedesRacers.index))
sns.color_palette("hls", 8)
axs.set_facecolor('k')
axs.patch.set_alpha(0.9)
plt.title("Top Mercedes Race Winners", fontsize = 20)
plt.ylabel('Drivers', fontsize = 15)
plt.xlabel('Total Wins', fontsize = 15)
plt.show()

Race Stats Comparison between 2019 and 2020 season

In [40]:
data_19 = data[data['year'] == 2019]
data_20 = data[data['year'] == 2020]
In [41]:
driver_age = data_19.drop_duplicates(subset = ["driverId"])
age_count = driver_age['age'].value_counts()

trace = go.Pie(labels = age_count.index, values = age_count.values, hole = 0.6,textinfo = "none")
layout = go.Layout(title = 'Age distribution across the grid - 2019', paper_bgcolor = 'gainsboro')
fig = go.Figure(data = [trace], layout = layout)
fig.update_layout(margin = dict(l = 60, r = 60, t = 50, b = 50))
iplot(fig, filename = "plotting-library")
In [42]:
driver_age = data_20.drop_duplicates(subset = ["driverId"])
age_count = driver_age['age'].value_counts()

trace = go.Pie(labels = age_count.index, values = age_count.values, hole = 0.6,textinfo = "none")
layout = go.Layout(title = 'Age distribution across the grid - 2020', paper_bgcolor = 'gainsboro')
fig = go.Figure(data = [trace], layout = layout)
fig.update_layout(margin = dict(l = 60, r = 60, t = 50, b = 50))
iplot(fig, filename = "plotting-library")
In [43]:
speedHam = (data_19.loc[((data_19['driverId'] == 1))]['fastestLapSpeed'].iloc[:].values.mean())
speedBot = (data_19.loc[((data_19['driverId'] == 822))]['fastestLapSpeed'].iloc[:].values.mean())
speedVet = (data_19.loc[((data_19['driverId'] == 20))]['fastestLapSpeed'].iloc[:].values.mean())
speedLec = (data_19.loc[((data_19['driverId'] == 844))]['fastestLapSpeed'].iloc[:].values.mean())
speedVer = (data_19.loc[((data_19['driverId'] == 830))]['fastestLapSpeed'].iloc[:].values.mean())
speedAlb = (data_19.loc[((data_19['driverId'] == 848))]['fastestLapSpeed'].iloc[:].values.mean())
speedNor = (data_19.loc[((data_19['driverId'] == 846))]['fastestLapSpeed'].iloc[:].values.mean())
speedSai = (data_19.loc[((data_19['driverId'] == 832))]['fastestLapSpeed'].iloc[:].values.mean())
speedRic = (data_19.loc[((data_19['driverId'] == 817))]['fastestLapSpeed'].iloc[:].values.mean())
speedHul = (data_19.loc[((data_19['driverId'] == 807))]['fastestLapSpeed'].iloc[:].values.mean())
speedGas = (data_19.loc[((data_19['driverId'] == 842))]['fastestLapSpeed'].iloc[:].values.mean())
speedKyv = (data_19.loc[((data_19['driverId'] == 826))]['fastestLapSpeed'].iloc[:].values.mean())
speedPerez = (data_19.loc[((data_19['driverId'] == 815))]['fastestLapSpeed'].iloc[:].values.mean())
speedStr = (data_19.loc[((data_19['driverId'] == 840))]['fastestLapSpeed'].iloc[:].values.mean())
speedRai = (data_19.loc[((data_19['driverId'] == 8))]['fastestLapSpeed'].iloc[:].values.mean())
speedGio = (data_19.loc[((data_19['driverId'] == 841))]['fastestLapSpeed'].iloc[:].values.mean())
speedMag = (data_19.loc[((data_19['driverId'] == 825))]['fastestLapSpeed'].iloc[:].values.mean())
speedGro = (data_19.loc[((data_19['driverId'] == 154))]['fastestLapSpeed'].iloc[:].values.mean())
speedKub = (data_19.loc[((data_19['driverId'] == 9))]['fastestLapSpeed'].iloc[:].values.mean())
speedRus = (data_19.loc[((data_19['driverId'] == 847))]['fastestLapSpeed'].iloc[:].values.mean())

fig, axs = plt.subplots(figsize = (20, 7))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 20)
plt.xticks(rotation = 45)

axs.set_facecolor('k')
axs.patch.set_alpha(0.9)
line1, = axs.bar('Hamilton', speedHam, color = 'c')
line2, = axs.bar('Bottas', speedBot, color = 'c')
line3, = axs.bar('Vettel', speedVet, color = 'r')
line4, = axs.bar('Leclerc', speedLec, color = 'r')
line5, = axs.bar('Verstappen', speedVer, color = 'navy')
line6, = axs.bar('Albon', speedAlb, color = 'navy')
line7, = axs.bar('Norris', speedNor, color = 'orange')
line8, = axs.bar('Sainz', speedSai, color = 'orange')
line9, = axs.bar('Ricciardo', speedRic, color = 'yellow')
line10, = axs.bar('Hulkenberg', speedHul, color = 'yellow')
line11, = axs.bar('Gasly', speedGas, color = 'white')
line12, = axs.bar('Kvyat', speedKyv, color = 'white')
line13, = axs.bar('Perez', speedPerez, color = 'lightpink')
line14, = axs.bar('Stroll', speedStr, color = 'lightpink')
line15, = axs.bar('Räikkönen', speedRai, color = 'darkred')
line16, = axs.bar('Giovinazzi', speedGio, color = 'darkred')
line17, = axs.bar('Magnussen', speedMag, color = 'dimgrey')
line18, = axs.bar('Grosjean', speedGro, color = 'dimgrey')
line19, = axs.bar('Kubica', speedKub, color = 'whitesmoke')
line20, = axs.bar('Russell', speedRus, color = 'whitesmoke')

plt.ylim(150, 220)
plt.title("Average top speeds - 2019", fontsize = 20)
plt.ylabel('Km/H', fontsize = 15)
plt.xlabel('Drivers', fontsize = 15)
plt.show()
In [44]:
speedHam2 = (data_20.loc[((data_20['driverId'] == 1))]['fastestLapSpeed'].iloc[:].values.mean())
speedBot2 = (data_20.loc[((data_20['driverId'] == 822))]['fastestLapSpeed'].iloc[:].values.mean())
speedVet2 = (data_20.loc[((data_20['driverId'] == 20))]['fastestLapSpeed'].iloc[:].values.mean())
speedLec2 = (data_20.loc[((data_20['driverId'] == 844))]['fastestLapSpeed'].iloc[:].values.mean())
speedVer2 = (data_20.loc[((data_20['driverId'] == 830))]['fastestLapSpeed'].iloc[:].values.mean())
speedAlb2 = (data_20.loc[((data_20['driverId'] == 848))]['fastestLapSpeed'].iloc[:].values.mean())
speedNor2 = (data_20.loc[((data_20['driverId'] == 846))]['fastestLapSpeed'].iloc[:].values.mean())
speedSai2 = (data_20.loc[((data_20['driverId'] == 832))]['fastestLapSpeed'].iloc[:].values.mean())
speedRic2 = (data_20.loc[((data_20['driverId'] == 817))]['fastestLapSpeed'].iloc[:].values.mean())
speedOco = (data_20.loc[((data_20['driverId'] == 839))]['fastestLapSpeed'].iloc[:].values.mean())
speedGas2 = (data_20.loc[((data_20['driverId'] == 842))]['fastestLapSpeed'].iloc[:].values.mean())
speedKyv2 = (data_20.loc[((data_20['driverId'] == 826))]['fastestLapSpeed'].iloc[:].values.mean())
speedPerez2 = (data_20.loc[((data_20['driverId'] == 815))]['fastestLapSpeed'].iloc[:].values.mean())
speedStr2 = (data_20.loc[((data_20['driverId'] == 840))]['fastestLapSpeed'].iloc[:].values.mean())
speedRai2 = (data_20.loc[((data_20['driverId'] == 8))]['fastestLapSpeed'].iloc[:].values.mean())
speedGio2 = (data_20.loc[((data_20['driverId'] == 841))]['fastestLapSpeed'].iloc[:].values.mean())
speedMag2 = (data_20.loc[((data_20['driverId'] == 825))]['fastestLapSpeed'].iloc[:].values.mean())
speedGro2 = (data_20.loc[((data_20['driverId'] == 154))]['fastestLapSpeed'].iloc[:].values.mean())
speedLat = (data_20.loc[((data_20['driverId'] == 849))]['fastestLapSpeed'].iloc[:].values.mean())
speedRus2 = (data_20.loc[((data_20['driverId'] == 847))]['fastestLapSpeed'].iloc[:].values.mean())

fig, axs = plt.subplots(figsize = (20, 7))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 20)
plt.xticks(rotation = 45)

axs.set_facecolor('k')
axs.patch.set_alpha(0.9)
line1, = axs.bar('Hamilton', speedHam2, color = 'c')
line2, = axs.bar('Bottas', speedBot2, color = 'c')
line3, = axs.bar('Vettel', speedVet2, color = 'r')
line4, = axs.bar('Leclerc', speedLec2, color = 'r')
line5, = axs.bar('Verstappen', speedVer2, color = 'navy')
line6, = axs.bar('Albon', speedAlb2, color = 'navy')
line7, = axs.bar('Norris', speedNor2, color = 'orange')
line8, = axs.bar('Sainz', speedSai2, color = 'orange')
line9, = axs.bar('Ricciardo', speedRic2, color = 'yellow')
line10, = axs.bar('Ocon', speedOco, color = 'yellow')
line11, = axs.bar('Gasly', speedGas2, color = 'white')
line12, = axs.bar('Kvyat', speedKyv2, color = 'white')
line13, = axs.bar('Perez', speedPerez2, color = 'lightpink')
line14, = axs.bar('Stroll', speedStr2, color = 'lightpink')
line15, = axs.bar('Räikkönen', speedRai2, color = 'darkred')
line16, = axs.bar('Giovinazzi', speedGio2, color = 'darkred')
line17, = axs.bar('Magnussen', speedMag2, color = 'dimgrey')
line18, = axs.bar('Grosjean', speedGro2, color = 'dimgrey')
line19, = axs.bar('Latifi', speedLat, color = 'whitesmoke')
line20, = ax.bar('Russell', speedRus2, color = 'whitesmoke')

plt.ylim(150, 230)
plt.title("Average top speeds - 2020", fontsize = 20)
plt.ylabel('Km/H', fontsize = 15)
plt.xlabel('Drivers', fontsize = 15)
plt.show()
In [45]:
plt.style.use('dark_background')
fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.boxplot(x = 'constructor', y = 'fastestLapTime', data = data_19)
plt.ylim(50, 120)
plt.title("Fastest Lap times by constructor - 2019",fontsize = 20)
plt.xlabel('Team', fontsize = 15)
plt.ylabel('Seconds', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()
In [46]:
plt.style.use('dark_background')
fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.boxplot(x = 'constructor', y = 'fastestLapTime', data = data_20)
plt.ylim(50, 140)
plt.title("Fastest Lap times by constructor - 2020", fontsize = 20)
plt.xlabel('Team', fontsize = 15)
plt.ylabel('Seconds', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()
In [47]:
plt.style.use('dark_background')
fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.boxplot(x = 'constructor', y = 'fastestLapSpeed', data = data_19)
plt.ylim(150, 280)
plt.title("Fastest Lap Speed by constructor - 2019",fontsize = 20)
plt.xlabel('Team', fontsize = 15)
plt.ylabel('KM/H', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()
In [48]:
plt.style.use('dark_background')
fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.boxplot(x = 'constructor', y = 'fastestLapSpeed', data = data_20)
plt.ylim(150, 280)
plt.title("Fastest Lap Speed by constructor - 2020", fontsize = 20)
plt.xlabel('Team', fontsize = 15)
plt.ylabel('KM/H', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()
In [49]:
plt.style.use('dark_background')
fig, axs = plt.subplots(figsize = (20, 8))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.barplot(x = 'GrandPrix', y = 'fastestLapSpeed', data = data_19)
plt.ylim(125, 275)
plt.title("Circuit vs Fastest Lap Speeds - 2019", fontsize = 20)
plt.xlabel('Grand Prix', fontsize = 15)
plt.ylabel('Km/H', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()
In [50]:
plt.style.use('dark_background')
fig, axs = plt.subplots(figsize=(20, 8))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.barplot(x = 'GrandPrix', y = 'fastestLapTime', data = data_20)
plt.title("Circuit vs Fastest Lap Times - 2020",fontsize = 20)
plt.xlabel('Grand Prix', fontsize = 15)
plt.ylabel('Secs', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()
In [51]:
plt.style.use('dark_background')
fig, axs = plt.subplots(figsize=(20, 5))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.barplot(x = 'constructor', y = 'pitStopDuration', data = data_19)
plt.title("Average pitstop time by constructor - 2019",fontsize = 20)
plt.xlabel('Constructor', fontsize = 15)
plt.ylabel('Secs', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()
In [52]:
plt.style.use('dark_background')
fig, axs = plt.subplots(figsize=(20, 5))
axs.tick_params(axis = "x", labelsize = 15)
axs.tick_params(axis = "y", labelsize = 15)
axs.set_facecolor('whitesmoke')

sns.barplot(x = 'constructor', y = 'pitStopDuration', data = data_20)
plt.title("Average pitstop time by constructor - 2020",fontsize = 20)
plt.xlabel('Constructor', fontsize = 15)
plt.ylabel('Secs', fontsize = 15)
plt.xticks(rotation = 45, ha = 'right')
plt.show()

2020 season saw a lot of red flags which caused the average pit stop time to go higher.

Model Building

In [53]:
data_backup = data.copy()

# data_backup.to_csv('final_data.csv', index = False)
# data = pd.read_csv('final_data.csv')
In [54]:
corrplot = data.corr()
plt.style.use("dark_background")
plt.figure(figsize = (20, 10))
sns.heatmap(corrplot, cmap = "Wistia", annot = True)
Out[54]:
<AxesSubplot:>
In [55]:
data = data.drop(['circuitId', 'circuitName', 'lat', 'lng', 'GrandPrix', 'driverRef', 'fullName', 'driverNationality', 'constructor', 
                  'constructorNationality', 'positionOrder'], axis = 1)
data = pd.get_dummies(data = data, prefix = 'circuit', prefix_sep = '_', columns = ['circuitRef'], drop_first = False)
data = pd.get_dummies(data = data, prefix = 'constructor', prefix_sep = '_', columns = ['constructorRef'], drop_first = False)
In [56]:
data.head()
Out[56]:
year roundNum resultId raceId driverId age constructorId q1 q2 q3 grid finishingPos laps raceTime fastestLap fastestLapRank fastestLapTime fastestLapSpeed statusId pitStops pitStopDuration constructorPoints constructorPosition constructorWins driverPoints driverPosition driverWins circuit_BAK circuit_adelaide circuit_ain-diab circuit_aintree circuit_albert_park circuit_americas circuit_anderstorp circuit_avus circuit_bahrain circuit_boavista circuit_brands_hatch circuit_bremgarten circuit_buddh circuit_catalunya circuit_charade circuit_dallas circuit_detroit circuit_dijon circuit_donington circuit_essarts circuit_estoril circuit_fuji circuit_galvez circuit_george circuit_hockenheimring circuit_hungaroring circuit_imola circuit_indianapolis circuit_interlagos circuit_istanbul circuit_jacarepagua circuit_jarama circuit_jerez circuit_kyalami circuit_las_vegas circuit_lemans circuit_long_beach circuit_magny_cours circuit_marina_bay circuit_monaco circuit_monsanto circuit_montjuic circuit_monza circuit_mosport circuit_mugello circuit_nivelles circuit_nurburgring circuit_okayama circuit_osterreichring circuit_pedralbes circuit_pescara circuit_phoenix circuit_portimao circuit_red_bull_ring circuit_reims circuit_ricard circuit_riverside circuit_rodriguez circuit_sebring circuit_sepang circuit_shanghai circuit_silverstone circuit_sochi circuit_spa circuit_suzuka circuit_tremblant circuit_valencia circuit_villeneuve circuit_watkins_glen circuit_yas_marina circuit_yeongam circuit_zandvoort circuit_zeltweg circuit_zolder constructor_adams constructor_afm constructor_ags constructor_alfa constructor_alphatauri constructor_alta constructor_amon constructor_apollon constructor_arrows constructor_arzani-volpini constructor_aston_martin constructor_ats constructor_bar constructor_behra-porsche constructor_bellasi constructor_benetton constructor_bmw constructor_bmw_sauber constructor_boro constructor_brabham constructor_brabham-alfa_romeo constructor_brabham-brm constructor_brabham-climax constructor_brabham-ford constructor_brabham-repco constructor_brawn constructor_brm constructor_brm-ford constructor_bromme constructor_brp constructor_bugatti constructor_butterworth constructor_caterham constructor_cisitalia constructor_coloni constructor_connaught constructor_connew constructor_cooper constructor_cooper-alfa_romeo constructor_cooper-ats constructor_cooper-borgward constructor_cooper-brm constructor_cooper-castellotti constructor_cooper-climax constructor_cooper-ferrari constructor_cooper-ford constructor_cooper-maserati constructor_cooper-osca constructor_dallara constructor_de_tomaso-alfa_romeo constructor_de_tomaso-ferrari constructor_de_tomaso-osca constructor_deidt constructor_del_roy constructor_derrington constructor_dunn constructor_eagle-climax constructor_eagle-weslake constructor_elder constructor_emeryson constructor_emw constructor_enb constructor_ensign constructor_epperly constructor_era constructor_eurobrun constructor_ewing constructor_ferguson constructor_ferrari constructor_fittipaldi constructor_fondmetal constructor_footwork constructor_force_india constructor_forti constructor_frazer_nash constructor_fry constructor_gilby constructor_gordini constructor_haas constructor_hall constructor_hesketh constructor_hill constructor_honda constructor_hrt constructor_hwm constructor_iso_marlboro constructor_jaguar constructor_jbw constructor_jordan constructor_kauhsen constructor_klenk constructor_kojima constructor_kurtis_kraft constructor_kuzma constructor_lago constructor_lambo constructor_lancia constructor_langley constructor_larrousse constructor_lds constructor_lds-alfa_romeo constructor_lds-climax constructor_lec constructor_lesovsky constructor_leyton constructor_life constructor_ligier constructor_lola constructor_lotus-borgward constructor_lotus-brm constructor_lotus-climax constructor_lotus-ford constructor_lotus-maserati constructor_lotus-pw constructor_lotus_f1 constructor_lotus_racing constructor_lyncar constructor_maki constructor_manor constructor_march constructor_march-alfa_romeo constructor_march-ford constructor_marchese constructor_martini constructor_marussia constructor_maserati constructor_matra constructor_matra-ford constructor_mbm constructor_mcguire constructor_mclaren constructor_mclaren-alfa_romeo constructor_mclaren-brm constructor_mclaren-ford constructor_mclaren-seren constructor_mercedes constructor_merzario constructor_meskowski constructor_mf1 constructor_milano constructor_minardi constructor_moda constructor_moore constructor_nichels constructor_olson constructor_onyx constructor_osca constructor_osella constructor_pacific constructor_pankratz constructor_parnelli constructor_pawl constructor_penske constructor_phillips constructor_politoys constructor_porsche constructor_prost constructor_protos constructor_racing_point constructor_rae constructor_ram constructor_re constructor_rebaque constructor_red_bull constructor_renault constructor_rial constructor_sauber constructor_scarab constructor_schroeder constructor_scirocco constructor_shadow constructor_shadow-ford constructor_shadow-matra constructor_shannon constructor_sherman constructor_simca constructor_simtek constructor_snowberger constructor_spirit constructor_spyker constructor_spyker_mf1 constructor_stebro constructor_stevens constructor_stewart constructor_super_aguri constructor_surtees constructor_sutton constructor_team_lotus constructor_tec-mec constructor_tecno constructor_theodore constructor_token constructor_toleman constructor_tomaso constructor_toro_rosso constructor_toyota constructor_trevis constructor_trojan constructor_turner constructor_tyrrell constructor_vanwall constructor_veritas constructor_vhristensen constructor_virgin constructor_watson constructor_wetteroth constructor_williams constructor_wolf constructor_zakspeed
20024 1950 1 20025 833 642 59 51 0.0 0.0 0.0 1 1 70 8003600 0 0 0.0 0.0 1 0.0 0.0 0.0 0.0 0.0 9.0 1.0 1.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
20025 1950 1 20026 833 786 52 51 0.0 0.0 0.0 2 2 70 8006200 0 0 0.0 0.0 1 0.0 0.0 0.0 0.0 0.0 6.0 2.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
20026 1950 1 20027 833 686 59 51 0.0 0.0 0.0 4 3 70 8055600 0 0 0.0 0.0 1 0.0 0.0 0.0 0.0 0.0 4.0 3.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
20027 1950 1 20028 833 704 59 154 0.0 0.0 0.0 6 4 68 0 0 0 0.0 0.0 12 0.0 0.0 0.0 0.0 0.0 3.0 4.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
20028 1950 1 20029 833 627 59 154 0.0 0.0 0.0 9 5 68 0 0 0 0.0 0.0 12 0.0 0.0 0.0 0.0 0.0 2.0 5.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
In [57]:
data.tail(20)
Out[57]:
year roundNum resultId raceId driverId age constructorId q1 q2 q3 grid finishingPos laps raceTime fastestLap fastestLapRank fastestLapTime fastestLapSpeed statusId pitStops pitStopDuration constructorPoints constructorPosition constructorWins driverPoints driverPosition driverWins circuit_BAK circuit_adelaide circuit_ain-diab circuit_aintree circuit_albert_park circuit_americas circuit_anderstorp circuit_avus circuit_bahrain circuit_boavista circuit_brands_hatch circuit_bremgarten circuit_buddh circuit_catalunya circuit_charade circuit_dallas circuit_detroit circuit_dijon circuit_donington circuit_essarts circuit_estoril circuit_fuji circuit_galvez circuit_george circuit_hockenheimring circuit_hungaroring circuit_imola circuit_indianapolis circuit_interlagos circuit_istanbul circuit_jacarepagua circuit_jarama circuit_jerez circuit_kyalami circuit_las_vegas circuit_lemans circuit_long_beach circuit_magny_cours circuit_marina_bay circuit_monaco circuit_monsanto circuit_montjuic circuit_monza circuit_mosport circuit_mugello circuit_nivelles circuit_nurburgring circuit_okayama circuit_osterreichring circuit_pedralbes circuit_pescara circuit_phoenix circuit_portimao circuit_red_bull_ring circuit_reims circuit_ricard circuit_riverside circuit_rodriguez circuit_sebring circuit_sepang circuit_shanghai circuit_silverstone circuit_sochi circuit_spa circuit_suzuka circuit_tremblant circuit_valencia circuit_villeneuve circuit_watkins_glen circuit_yas_marina circuit_yeongam circuit_zandvoort circuit_zeltweg circuit_zolder constructor_adams constructor_afm constructor_ags constructor_alfa constructor_alphatauri constructor_alta constructor_amon constructor_apollon constructor_arrows constructor_arzani-volpini constructor_aston_martin constructor_ats constructor_bar constructor_behra-porsche constructor_bellasi constructor_benetton constructor_bmw constructor_bmw_sauber constructor_boro constructor_brabham constructor_brabham-alfa_romeo constructor_brabham-brm constructor_brabham-climax constructor_brabham-ford constructor_brabham-repco constructor_brawn constructor_brm constructor_brm-ford constructor_bromme constructor_brp constructor_bugatti constructor_butterworth constructor_caterham constructor_cisitalia constructor_coloni constructor_connaught constructor_connew constructor_cooper constructor_cooper-alfa_romeo constructor_cooper-ats constructor_cooper-borgward constructor_cooper-brm constructor_cooper-castellotti constructor_cooper-climax constructor_cooper-ferrari constructor_cooper-ford constructor_cooper-maserati constructor_cooper-osca constructor_dallara constructor_de_tomaso-alfa_romeo constructor_de_tomaso-ferrari constructor_de_tomaso-osca constructor_deidt constructor_del_roy constructor_derrington constructor_dunn constructor_eagle-climax constructor_eagle-weslake constructor_elder constructor_emeryson constructor_emw constructor_enb constructor_ensign constructor_epperly constructor_era constructor_eurobrun constructor_ewing constructor_ferguson constructor_ferrari constructor_fittipaldi constructor_fondmetal constructor_footwork constructor_force_india constructor_forti constructor_frazer_nash constructor_fry constructor_gilby constructor_gordini constructor_haas constructor_hall constructor_hesketh constructor_hill constructor_honda constructor_hrt constructor_hwm constructor_iso_marlboro constructor_jaguar constructor_jbw constructor_jordan constructor_kauhsen constructor_klenk constructor_kojima constructor_kurtis_kraft constructor_kuzma constructor_lago constructor_lambo constructor_lancia constructor_langley constructor_larrousse constructor_lds constructor_lds-alfa_romeo constructor_lds-climax constructor_lec constructor_lesovsky constructor_leyton constructor_life constructor_ligier constructor_lola constructor_lotus-borgward constructor_lotus-brm constructor_lotus-climax constructor_lotus-ford constructor_lotus-maserati constructor_lotus-pw constructor_lotus_f1 constructor_lotus_racing constructor_lyncar constructor_maki constructor_manor constructor_march constructor_march-alfa_romeo constructor_march-ford constructor_marchese constructor_martini constructor_marussia constructor_maserati constructor_matra constructor_matra-ford constructor_mbm constructor_mcguire constructor_mclaren constructor_mclaren-alfa_romeo constructor_mclaren-brm constructor_mclaren-ford constructor_mclaren-seren constructor_mercedes constructor_merzario constructor_meskowski constructor_mf1 constructor_milano constructor_minardi constructor_moda constructor_moore constructor_nichels constructor_olson constructor_onyx constructor_osca constructor_osella constructor_pacific constructor_pankratz constructor_parnelli constructor_pawl constructor_penske constructor_phillips constructor_politoys constructor_porsche constructor_prost constructor_protos constructor_racing_point constructor_rae constructor_ram constructor_re constructor_rebaque constructor_red_bull constructor_renault constructor_rial constructor_sauber constructor_scarab constructor_schroeder constructor_scirocco constructor_shadow constructor_shadow-ford constructor_shadow-matra constructor_shannon constructor_sherman constructor_simca constructor_simtek constructor_snowberger constructor_spirit constructor_spyker constructor_spyker_mf1 constructor_stebro constructor_stevens constructor_stewart constructor_super_aguri constructor_surtees constructor_sutton constructor_team_lotus constructor_tec-mec constructor_tecno constructor_theodore constructor_token constructor_toleman constructor_tomaso constructor_toro_rosso constructor_toyota constructor_trevis constructor_trojan constructor_turner constructor_tyrrell constructor_vanwall constructor_veritas constructor_vhristensen constructor_virgin constructor_watson constructor_wetteroth constructor_williams constructor_wolf constructor_zakspeed
24940 2020 17 24946 1047 830 23 9 96.0 95.6 95.2 1 1 55 5788645 14 2 101.0 198.046 1 1.0 21.289000 319.0 2.0 2.0 214.0 3.0 2.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24941 2020 17 24947 1047 822 31 131 95.7 95.5 95.3 2 2 55 5804621 40 3 101.1 197.707 1 1.0 21.587000 573.0 1.0 13.0 223.0 2.0 2.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24942 2020 17 24948 1047 1 35 131 95.5 95.5 95.3 3 3 55 5807060 37 5 101.4 197.144 1 1.0 22.406000 573.0 1.0 13.0 347.0 1.0 11.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24943 2020 17 24949 1047 848 24 9 96.1 95.7 95.6 5 4 55 5808632 42 4 101.2 197.520 1 1.0 21.508000 319.0 2.0 2.0 105.0 7.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24944 2020 17 24950 1047 846 21 1 96.0 95.8 95.5 4 5 55 5849374 53 12 102.0 196.092 1 1.0 21.840000 202.0 3.0 0.0 97.0 9.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24945 2020 17 24951 1047 832 26 1 96.5 96.2 95.8 6 6 55 5854307 48 11 101.9 196.125 1 1.0 22.140000 202.0 3.0 0.0 105.0 6.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24946 2020 17 24952 1047 817 31 4 96.7 96.4 0.0 11 7 55 5862393 55 1 100.9 198.109 1 1.0 22.123000 181.0 5.0 0.0 119.0 5.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24947 2020 17 24953 1047 842 24 213 96.5 96.3 96.2 9 8 55 5878363 53 15 102.5 195.116 1 1.0 22.000000 107.0 7.0 1.0 75.0 10.0 1.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24948 2020 17 24954 1047 839 24 4 96.8 96.4 0.0 10 9 55 5799996 47 18 102.9 194.320 1 1.0 21.615000 181.0 5.0 0.0 62.0 12.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24949 2020 17 24955 1047 840 22 211 96.5 96.1 96.0 8 10 55 5790314 41 9 101.9 196.281 1 1.0 22.036000 195.0 4.0 1.0 75.0 11.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24950 2020 17 24956 1047 826 26 213 96.5 96.2 96.0 7 11 54 0 47 17 102.7 194.679 11 1.0 25.724000 107.0 7.0 1.0 32.0 14.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24951 2020 17 24957 1047 8 41 51 97.6 0.0 0.0 15 12 54 0 53 14 102.1 195.739 11 1.0 22.012000 8.0 8.0 0.0 4.0 16.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24952 2020 17 24958 1047 844 23 6 95.9 95.9 96.1 12 13 54 0 24 10 101.9 196.200 11 1.0 22.178000 131.0 6.0 0.0 98.0 8.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24953 2020 17 24959 1047 20 33 6 96.7 96.6 0.0 13 14 54 0 37 6 101.5 197.037 11 1.0 22.040000 131.0 6.0 0.0 33.0 13.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24954 2020 17 24960 1047 847 22 3 98.0 0.0 0.0 16 15 54 0 54 20 103.3 193.504 11 1.0 22.085000 0.0 10.0 0.0 3.0 18.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
24955 2020 17 24961 1047 841 27 51 97.1 98.2 0.0 14 16 54 0 29 7 101.7 196.650 11 1.0 21.480000 8.0 8.0 0.0 4.0 17.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24956 2020 17 24962 1047 849 25 3 98.4 0.0 0.0 18 17 54 0 49 16 102.5 195.073 11 2.0 21.870500 0.0 10.0 0.0 0.0 21.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
24957 2020 17 24963 1047 825 28 210 97.9 0.0 0.0 20 18 54 0 50 13 102.0 196.025 11 2.0 22.906000 3.0 9.0 0.0 1.0 20.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24958 2020 17 24964 1047 850 24 210 98.2 0.0 0.0 17 19 53 0 50 8 101.7 196.588 12 3.0 26.015667 3.0 9.0 0.0 0.0 23.0 0.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
24959 2020 17 24965 1047 815 30 211 96.0 0.0 0.0 19 0 8 0 6 19 103.3 193.625 7 0.0 0.000000 195.0 4.0 1.0 125.0 4.0 1.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
In [58]:
data.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 24960 entries, 20024 to 24959
Columns: 310 entries, year to constructor_zakspeed
dtypes: float64(13), int64(14), uint8(283)
memory usage: 12.1 MB
In [59]:
train = data[data['year']<2020]
test = data[data['year']>2019]

x_train = train.drop(['finishingPos'], axis = 1)
y_train = train['finishingPos']

x_test = test.drop(['finishingPos'], axis = 1)
y_test = test['finishingPos']
In [60]:
# scaler = MinMaxScaler()
# x_train = scaler.fit_transform(x_train)
# x_test = scaler.fit_transform(x_test)
In [61]:
xgb_model = xgb.XGBClassifier(n_estimators = 50, random_state = 0, seed = None, silent = True, subsample = 0.6)
xgb_model.fit(x_train, y_train)
[10:46:18] WARNING: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\learner.cc:516: 
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Out[61]:
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=50, n_jobs=0, num_parallel_tree=1,
              objective='multi:softprob', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=None, seed=0, silent=True,
              subsample=0.6, tree_method='exact', validate_parameters=1,
              verbosity=None)
In [62]:
y_pred = xgb_model.predict(x_test)
In [63]:
print('Model Accuracy is {}'.format(accuracy_score(y_test, y_pred)*1e2))
print(classification_report(y_test, y_pred))
Model Accuracy is 34.705882352941174
              precision    recall  f1-score   support

           0       0.96      1.00      0.98        53
           1       0.75      0.71      0.73        17
           2       0.50      0.59      0.54        17
           3       0.25      0.18      0.21        17
           4       0.40      0.24      0.30        17
           5       0.21      0.29      0.24        17
           6       0.11      0.18      0.14        17
           7       0.19      0.29      0.23        17
           8       0.21      0.18      0.19        17
           9       0.11      0.12      0.11        17
          10       0.21      0.24      0.22        17
          11       0.08      0.06      0.07        17
          12       0.17      0.06      0.09        17
          13       0.00      0.00      0.00        16
          14       0.09      0.13      0.11        15
          15       0.21      0.20      0.21        15
          16       0.29      0.38      0.33        13
          17       0.14      0.17      0.15        12
          18       0.00      0.00      0.00         7
          19       0.00      0.00      0.00         5

    accuracy                           0.35       340
   macro avg       0.25      0.25      0.24       340
weighted avg       0.34      0.35      0.34       340

D:\Programs\Anaconda\lib\site-packages\sklearn\metrics\_classification.py:1221: UndefinedMetricWarning:

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

In [64]:
plt.figure(figsize = (20, 15))
sns.heatmap(confusion_matrix(y_test, y_pred))
Out[64]:
<AxesSubplot:>
In [65]:
output = x_test.copy()
output = output[['year', 'roundNum', 'raceId', 'driverId', 'constructorId', 'grid']]
output['y_pred'] = y_pred
output['y_actual'] = y_test
output = pd.merge(output, drivers[['driverId', 'fullName']], on = 'driverId', how = 'left')
output = pd.merge(output, constructors[['constructorId', 'name']], on = 'constructorId', how = 'left')
output = pd.merge(output, races[['raceId', 'name']], on = 'raceId', how = 'left')
output = output.rename(columns = {'year': 'Year', 'roundNum':'Round', 'grid': 'QualifyingGrid',
                                  'name_x': 'Constructor', 'name_y': 'GrandPrix', 'y_actual': 'FinalPosition',
                                  'y_pred': 'Prediction', 'fullName': 'Driver'}, inplace = False)
output = output[['Year', 'Round', 'GrandPrix', 'Constructor', 'Driver', 'QualifyingGrid', 'FinalPosition', 'Prediction']]
output
# output.to_csv('output.csv', index = False)
Out[65]:
Year Round GrandPrix Constructor Driver QualifyingGrid FinalPosition Prediction
0 2020 1 Austrian Grand Prix Mercedes Valtteri Bottas 1 1 1
1 2020 1 Austrian Grand Prix Ferrari Charles Leclerc 7 2 2
2 2020 1 Austrian Grand Prix McLaren Lando Norris 3 3 3
3 2020 1 Austrian Grand Prix Mercedes Lewis Hamilton 5 4 4
4 2020 1 Austrian Grand Prix McLaren Carlos Sainz 8 5 5
... ... ... ... ... ... ... ... ...
335 2020 17 Abu Dhabi Grand Prix Alfa Romeo Antonio Giovinazzi 14 16 11
336 2020 17 Abu Dhabi Grand Prix Williams Nicholas Latifi 18 17 16
337 2020 17 Abu Dhabi Grand Prix Haas F1 Team Kevin Magnussen 20 18 13
338 2020 17 Abu Dhabi Grand Prix Haas F1 Team Pietro Fittipaldi 17 19 18
339 2020 17 Abu Dhabi Grand Prix Racing Point Sergio Pérez 19 0 0

340 rows × 8 columns

In [66]:
podium = []

for x in data['finishingPos']:
    if x == 1:
        podium.append(1)
    elif x == 2:
        podium.append(1)
    elif x == 3:
        podium.append(1)
    else:
        podium.append(0)

data['podium'] = podium
In [67]:
train = data[data['year']<2020]
test = data[data['year']>2019]

x_train = train.drop(['finishingPos', 'podium'], axis = 1)
y_train = train['podium']

x_test = test.drop(['finishingPos', 'podium'], axis = 1)
y_test = test['podium']
In [68]:
xgb_model = xgb.XGBClassifier(n_estimators = 100, random_state = 0, seed = None, silent = True, subsample = 0.6)
xgb_model.fit(x_train, y_train)
[10:48:41] WARNING: C:\Users\Administrator\workspace\xgboost-win64_release_1.2.0\src\learner.cc:516: 
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Out[68]:
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=0, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=0,
              silent=True, subsample=0.6, tree_method='exact',
              validate_parameters=1, verbosity=None)
In [69]:
y_pred = xgb_model.predict(x_test)
In [70]:
print('Model Accuracy is {}'.format(accuracy_score(y_test, y_pred)*1e2))
print(classification_report(y_test, y_pred))
Model Accuracy is 93.52941176470588
              precision    recall  f1-score   support

           0       0.94      0.99      0.96       289
           1       0.89      0.65      0.75        51

    accuracy                           0.94       340
   macro avg       0.92      0.82      0.86       340
weighted avg       0.93      0.94      0.93       340

In [71]:
plt.figure(figsize = (10, 10))
sns.heatmap(confusion_matrix(y_test, y_pred), cmap = "viridis")
Out[71]:
<AxesSubplot:>
In [72]:
output = x_test.copy()
output = output[['year', 'roundNum', 'raceId', 'driverId', 'constructorId', 'grid']]
output['y_pred'] = y_pred
output['y_actual'] = y_test
output = pd.merge(output, drivers[['driverId', 'fullName']], on = 'driverId', how = 'left')
output = pd.merge(output, constructors[['constructorId', 'name']], on = 'constructorId', how = 'left')
output = pd.merge(output, races[['raceId', 'name']], on = 'raceId', how = 'left')
output = output.rename(columns = {'year': 'Year', 'roundNum':'Round', 'grid': 'QualifyingGrid',
                                  'name_x': 'Constructor', 'name_y': 'GrandPrix', 'y_actual': 'FinalPosition',
                                  'y_pred': 'Prediction', 'fullName': 'Driver'}, inplace = False)
output = output[['Year', 'Round', 'GrandPrix', 'Constructor', 'Driver', 'QualifyingGrid', 'FinalPosition', 'Prediction']]
output
# output.to_csv('output.csv', index = False)
Out[72]:
Year Round GrandPrix Constructor Driver QualifyingGrid FinalPosition Prediction
0 2020 1 Austrian Grand Prix Mercedes Valtteri Bottas 1 1 1
1 2020 1 Austrian Grand Prix Ferrari Charles Leclerc 7 1 0
2 2020 1 Austrian Grand Prix McLaren Lando Norris 3 1 1
3 2020 1 Austrian Grand Prix Mercedes Lewis Hamilton 5 0 0
4 2020 1 Austrian Grand Prix McLaren Carlos Sainz 8 0 0
... ... ... ... ... ... ... ... ...
335 2020 17 Abu Dhabi Grand Prix Alfa Romeo Antonio Giovinazzi 14 0 0
336 2020 17 Abu Dhabi Grand Prix Williams Nicholas Latifi 18 0 0
337 2020 17 Abu Dhabi Grand Prix Haas F1 Team Kevin Magnussen 20 0 0
338 2020 17 Abu Dhabi Grand Prix Haas F1 Team Pietro Fittipaldi 17 0 0
339 2020 17 Abu Dhabi Grand Prix Racing Point Sergio Pérez 19 0 0

340 rows × 8 columns